In [2]:
import copy, pathlib, math
import PIL.Image as Image

import sklearn
import numpy
import matplotlib
import matplotlib.pyplot as plt

print("scikit-learn version:", sklearn.__version__)     # 1.1.3
print("numpy version:", numpy.__version__)              # 1.23.4
print("matplotlib version:", matplotlib.__version__)    # 3.6.2
scikit-learn version: 1.6.1
numpy version: 1.23.5
matplotlib version: 3.10.0
In [3]:
import numpy as np

size = 256

def load_image(file, size):
    img = Image.open(file).convert("RGB")  # Convert all images to RGB
    img = img.resize((size, size))
    return np.array(img).flatten() 

def load_labelled_images(path, size):
    labels = list()
    files = list()
    for file_info in pathlib.Path(path).glob("**/*"):
        if file_info.suffix.lower() in ['.jpg', '.jpeg', '.png']:  
            labels.append(file_info.parent.name)  
            files.append(str(file_info))
    imgs = numpy.array([load_image(f, size) for f in files])
    return imgs, numpy.array(labels)      

images, labels = load_labelled_images("./car_dataset", size)
print("Loaded", len(images), "images in the following", len(numpy.unique(labels)), "classes:")
for label in numpy.unique(labels):
    print(label)
Loaded 179 images in the following 3 classes:
audi
bmw
mercedes
In [7]:
sample_size = 24


plotimgs = copy.deepcopy(images)
numpy.random.shuffle(plotimgs)
rows = plotimgs[:sample_size]

_, subplots = plt.subplots(nrows = math.ceil(sample_size/8), ncols = 8, figsize=(18, int(sample_size/3)))
subplots = subplots.flatten()
for i, x in enumerate(rows):
    subplots[i].imshow(numpy.reshape(x, [size, size, 3]))
    subplots[i].set_xticks([])
    subplots[i].set_yticks([])
No description has been provided for this image
In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=.3, random_state=0)
In [9]:
from sklearn.svm import SVC
model = SVC()
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("Accuracy:", score)
Accuracy: 0.3148148148148148
In [10]:
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
report = classification_report(y_test, predictions)
print(report)
              precision    recall  f1-score   support

        audi       0.39      0.67      0.49        21
         bmw       0.17      0.21      0.19        14
    mercedes       0.00      0.00      0.00        19

    accuracy                           0.31        54
   macro avg       0.19      0.29      0.23        54
weighted avg       0.19      0.31      0.24        54

C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\sklearn\metrics\_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
In [11]:
_, subplots = plt.subplots(nrows = math.ceil(len(X_test)/4), ncols = 4, figsize=(15, len(X_test)))
subplots = subplots.flatten()

for i, x in enumerate(X_test):
    subplots[i].imshow(numpy.reshape(x, [size, size, 3]))
    subplots[i].set_xticks([])
    subplots[i].set_yticks([])
    subplots[i].set_title(predictions[i] + (" (correct)" if predictions[i] == y_test[i] else " (wrong)"))
No description has been provided for this image
In [12]:
from sklearn.svm import SVC

for C_value in [0.5, 1.0, 2.0]:
    model = SVC(C=C_value)
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print(f"Accuracy with C={C_value}: {score:.4f}")
Accuracy with C=0.5: 0.3889
Accuracy with C=1.0: 0.3148
Accuracy with C=2.0: 0.2778
In [13]:
kernels = ['linear', 'poly', 'rbf', 'sigmoid']

for kernel in kernels:
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print(f"Accuracy with {kernel} kernel: {score:.4f}")
Accuracy with linear kernel: 0.2963
Accuracy with poly kernel: 0.3148
Accuracy with rbf kernel: 0.3148
Accuracy with sigmoid kernel: 0.3704